{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>宝鸡文理学院</td>\n",
       "      <td>基于AI的固沙多功能扎草生态机器人</td>\n",
       "      <td>人工智能应用</td>\n",
       "      <td>张桦  都娜娜  粟建翔</td>\n",
       "      <td>胡静波</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>宝鸡文理学院</td>\n",
       "      <td>基于分时控制的森林安全防护多步态机器人</td>\n",
       "      <td>人工智能应用</td>\n",
       "      <td>范开裕  房旭  赵雨竹</td>\n",
       "      <td>王欢</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>宝鸡文理学院</td>\n",
       "      <td>基于机械视觉的分类和处理垃圾桶的设计</td>\n",
       "      <td>物联网应用</td>\n",
       "      <td>王俊豪  王旭  罗益</td>\n",
       "      <td>周新淳  钱郁</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>宝鸡文理学院</td>\n",
       "      <td>e家居--基于物联网的智能房车控制系统</td>\n",
       "      <td>物联网应用</td>\n",
       "      <td>刘楠  孙龙桥  冯新洋</td>\n",
       "      <td>张磊</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>甘肃民族师范学院</td>\n",
       "      <td>初中化学四大基本反应的讲解</td>\n",
       "      <td>微课与教学辅助</td>\n",
       "      <td>王强  李彦武  张丽丽</td>\n",
       "      <td>县小平  张明文</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>95</td>\n",
       "      <td>兰州信息科技学院</td>\n",
       "      <td>基于AI自识别购物车</td>\n",
       "      <td>人工智能应用</td>\n",
       "      <td>王东辉  南利斌  王瑞瑞</td>\n",
       "      <td>袁英年  朱明敏</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>96</td>\n",
       "      <td>兰州信息科技学院</td>\n",
       "      <td>基于AI的智能管道清洗机器人</td>\n",
       "      <td>物联网应用</td>\n",
       "      <td>杜佳珉  杨良全  袁梓铭</td>\n",
       "      <td>谢黎明  陆学斌</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>97</td>\n",
       "      <td>陇东学院</td>\n",
       "      <td>自动识别字幕翻译播放器</td>\n",
       "      <td>软件应用与开发</td>\n",
       "      <td>杨成成  赵杰  杨锐</td>\n",
       "      <td>郑璐</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>98</td>\n",
       "      <td>陇东学院</td>\n",
       "      <td>基于群人工智能算法的函数优化器</td>\n",
       "      <td>软件应用与开发</td>\n",
       "      <td>秦世豪  杨稳权  任尚才</td>\n",
       "      <td>李娜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>99</td>\n",
       "      <td>陇东学院</td>\n",
       "      <td>基于YOLO算法口罩检测系统</td>\n",
       "      <td>软件应用与开发</td>\n",
       "      <td>曹虎斌  黄福玲</td>\n",
       "      <td>姚云霞</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0         0                    1        2              3  \\\n",
       "0            0    宝鸡文理学院    基于AI的固沙多功能扎草生态机器人   人工智能应用   张桦  都娜娜  粟建翔   \n",
       "1            1    宝鸡文理学院  基于分时控制的森林安全防护多步态机器人   人工智能应用   范开裕  房旭  赵雨竹   \n",
       "2            2    宝鸡文理学院   基于机械视觉的分类和处理垃圾桶的设计    物联网应用    王俊豪  王旭  罗益   \n",
       "3            3    宝鸡文理学院  e家居--基于物联网的智能房车控制系统    物联网应用   刘楠  孙龙桥  冯新洋   \n",
       "4            4  甘肃民族师范学院        初中化学四大基本反应的讲解  微课与教学辅助   王强  李彦武  张丽丽   \n",
       "..         ...       ...                  ...      ...            ...   \n",
       "95          95  兰州信息科技学院           基于AI自识别购物车   人工智能应用  王东辉  南利斌  王瑞瑞   \n",
       "96          96  兰州信息科技学院       基于AI的智能管道清洗机器人    物联网应用  杜佳珉  杨良全  袁梓铭   \n",
       "97          97      陇东学院          自动识别字幕翻译播放器  软件应用与开发    杨成成  赵杰  杨锐   \n",
       "98          98      陇东学院      基于群人工智能算法的函数优化器  软件应用与开发  秦世豪  杨稳权  任尚才   \n",
       "99          99      陇东学院       基于YOLO算法口罩检测系统  软件应用与开发       曹虎斌  黄福玲   \n",
       "\n",
       "           4  \n",
       "0        胡静波  \n",
       "1         王欢  \n",
       "2    周新淳  钱郁  \n",
       "3         张磊  \n",
       "4   县小平  张明文  \n",
       "..       ...  \n",
       "95  袁英年  朱明敏  \n",
       "96  谢黎明  陆学斌  \n",
       "97        郑璐  \n",
       "98        李娜  \n",
       "99       姚云霞  \n",
       "\n",
       "[100 rows x 6 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pdfplumber\n",
    "import pandas as pd\n",
    "read_path=\"C:/Users/YC111/Desktop/作业/bspdf.pdf\"\n",
    "pdf=pdfplumber.open(read_path)\n",
    "result_df = pd.DataFrame()\n",
    "for page in pdf.pages:\n",
    "    table = []\n",
    "for i in range(len(pdf.pages)):\n",
    "    if(i+2)<32:\n",
    "      page = pdf.pages[i + 1]\n",
    "      table.extend(page.extract_table())\n",
    "    df_detail = pd.DataFrame(table[1:], columns=table[0])\n",
    "    result_df = pd.concat([df_detail, result_df], ignore_index=True)\n",
    "result_df.dropna(axis=1, how='all', inplace=True)\n",
    "result_df.columns = ['学校', '项目名称','大类', '参赛学生', '指导老师']\n",
    "df=pd.DataFrame(table)\n",
    "df.to_csv(\"C:/Users/YC111/Desktop/作业/2022年西北赛区推送全国赛名单.csv\")\n",
    "fpath =\"C:/Users/YC111/Desktop/作业/2022年西北赛区推送全国赛名单.csv\"\n",
    "bsqyb= pd.read_csv(fpath)\n",
    "bsqyb.head(n=100)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.10 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "09738f47427dba2c99ec68f9759744be2c477c434791ae3d2ff19909b29a18a8"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
